1 Introduction

Here, we will perform a trajectory analysis with the final annotation of the plasma cells (PC). We will use the TSCAN package and follow this tutorial.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(scater)
library(TSCAN)
library(tidyverse)

2.2 Define parameters

# Paths
path_to_obj <- "~/Desktop/PhD/PC_with_csMBC_and_AIDcells_analysis/data/PC_level_5_seurat_object_clean_with_new_csMBC_and_aid_and_with_annotated_clusters.rds"


# Colors
color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",   
                    "#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",   
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32", 
                    "black",   "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",   
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2", 
                    "#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",   
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",   
                    "#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",  
                    "Brown")

2.3 Read data

pc <- readRDS(path_to_obj)
DimPlot(pc, group.by = "names_level_5_clusters", cols = color_palette)

3 Infer pseudotime

# Convert to SingleCellExperiment
pc_sce <- as.SingleCellExperiment(pc)
colLabels(pc_sce) <- pc_sce$names_level_5_clusters


# Calculate minimum spanning tree (MST)
by_cluster <- aggregateAcrossCells(pc_sce, ids = colLabels(pc_sce))
centroids <- reducedDim(by_cluster, "HARMONY")[, 1:30]
mst <- createClusterMST(centroids, clusters = NULL)
mst
## IGRAPH 9ef6883 UNW- 27 26 -- 
## + attr: name (v/c), coordinates (v/x), weight (e/n), gain (e/n)
## + edges from 9ef6883 (vertex names):
##  [1] csMBC                                      --Early MBC-derived PC precursors                  DZ-derived PC precursors                   --GZ-GC B cells                                    DZ-derived PC precursors                   --IgM+ Early PC precursors                         Early MBC-derived PC precursors            --MBC-derived PC precursors                        IgD PC precursor                           --LZ-derived IgG+ PC precursor 1                   IGHV3-20/43 expressing PCs                 --LZ-derived IgG+ PC precursor 3                   IgM+ Early PC precursors                   --LZ-derived early PC precursors (PRDM1/XBP1/IRF4) IgM+ Early PC precursors                   --Pre-post prolif. Plasmablasts 1                  IgM+ PC precursors 1                       --IgM+ PC precursors 2                             IgM+ PC precursors 2                       --IgM+ PC precursors 3                             IgM+ PC precursors 3                       --IgM+ PC precursors 4                             IgM+ PC precursors 3                       --MBC-derived PC precursors                       
## [13] IgM+ PC precursors 4                       --Mature IgM+                                      LZ-derived early PC precursors (PRDM1)     --LZ-derived early PC precursors (PRDM1/XBP1)      LZ-derived early PC precursors (PRDM1)     --LZ-GC B cells                                    LZ-derived early PC precursors (PRDM1/XBP1)--LZ-derived early PC precursors (PRDM1/XBP1/IRF4) LZ-derived IgG+ PC precursor 1             --LZ-derived IgG+ PC precursor 2                   LZ-derived IgG+ PC precursor 1             --Pre-post prolif. Plasmablasts 1                  LZ-derived IgG+ PC precursor 2             --LZ-derived IgG+ PC precursor 3                   LZ-derived IgG+ PC precursor 3             --Mature IgG+ 1                                    Mature IgA+ 1                              --Mature IgG+ 1                                    Mature IgA+ 1                              --Mature IgM+                                      Mature IgA+ 2                              --Mature IgG+ 2                                    Mature IgG+ 1                              --Mature IgG+ 2                                   
## [25] Pre-post prolif. Plasmablasts 1            --Pre-post prolif. Plasmablasts 2                  Pre-post prolif. Plasmablasts 2            --Proliferative plasmablasts
line_data <- reportEdges(
  by_cluster,
  mst = mst,
  clusters = NULL,
  use.dimred = "UMAP"
)
plotUMAP(pc_sce, colour_by = "names_level_5_clusters") + 
  geom_line(data = line_data, mapping = aes(x = UMAP_1, y = UMAP_2, group = edge))

# Predict pseudotime
map_tscan <- mapCellsToEdges(
  reducedDim(pc_sce, "HARMONY")[, 1:30],
  mst = mst,
  clusters = colLabels(pc_sce)
)
tscan_pseudo <- orderCells(map_tscan, mst, start = "LZ-GC B cells")
head(tscan_pseudo)
## class: PseudotimeOrdering 
## dim: 6 7 
## metadata(1): start
## pathStats(1): ''
## cellnames(6): bw94nf57_vm85woki_AAACGAACATCCGAGC-1 bw94nf57_vm85woki_AAAGGATCATGACTAC-1 ... bw94nf57_vm85woki_AAAGTGAGTTACGGAG-1 bw94nf57_vm85woki_AAATGGATCCGATTAG-1
## cellData names(4): left.cluster right.cluster left.distance right.distance
## pathnames(7): GZ-GC B cells IgD PC precursor ... IgM+ PC precursors 1 csMBC
## pathData names(0):
common_pseudo <- averagePseudotime(tscan_pseudo) 
plotUMAP(pc_sce, colour_by = I(common_pseudo)) +
  geom_line(data=line_data, mapping=aes(x=UMAP_1, y=UMAP_2, group=edge))

4 Trajectory-based DEA

# Fit a model for each gene
pseudotime_igha <- pathStat(tscan_pseudo)[, "Mature IgA+ 2"]
plotUMAP(pc_sce, colour_by = I(pseudotime_igha)) +
  geom_line(data=line_data, mapping=aes(x=UMAP_1, y=UMAP_2, group=edge))

pseudo <- testPseudotime(pc_sce, pseudotime = pseudotime_igha)
pseudo$symbol <- rownames(pseudo)
pseudo[order(pseudo$p.value), ]
## DataFrame with 37378 rows and 4 columns
##                  logFC   p.value       FDR      symbol
##              <numeric> <numeric> <numeric> <character>
## RPL22      -0.01540940         0         0       RPL22
## ENO1       -0.01226576         0         0        ENO1
## PRDM2      -0.00784347         0         0       PRDM2
## CAPZB      -0.01089302         0         0       CAPZB
## DDOST       0.00470137         0         0       DDOST
## ...                ...       ...       ...         ...
## AC171558.1           0       NaN       NaN  AC171558.1
## AC133551.1           0       NaN       NaN  AC133551.1
## AC136612.1           0       NaN       NaN  AC136612.1
## AC136616.1           0       NaN       NaN  AC136616.1
## AC023491.2           0       NaN       NaN  AC023491.2
# Make a copy of SCE to include pseudotimes to colData
pc_sce2 <- pc_sce
pc_sce2$pseudotime_main_trajectory <- pseudotime_igha
sorted <- pseudo[order(pseudo$p.value), ]
sorted_up <- as.data.frame(sorted[sorted$logFC > 0, ])
sorted_down <- as.data.frame(sorted[sorted$logFC < 0, ])
rowData(pc_sce2)$symbol <- rownames(pc_sce2)


# Plot relevant TF
tf <- c("BCL6", "PRDM1", "PRDM2", "XBP1", "IRF4", "IRF1", "IRF8")
plotExpression(
  pc_sce2,
  features = tf,
  swap_rownames = "symbol",
  x = "pseudotime_main_trajectory",
  colour_by = "names_level_5_clusters"
) +
  theme(legend.position = "none")

plotExpression(
  pc_sce2,
  features = str_subset(sorted_up$symbol, "^IRF"),
  swap_rownames = "symbol",
  x = "pseudotime_main_trajectory",
  colour_by = "names_level_5_clusters"
) +
  geom_smooth() +
  theme(legend.position = "none")

4.1 GSEA as a function of pseudotime